import os
import time

from Congressgov.utils.environ import check_environ

# Scrapy settings for Congressgov project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
#     https://docs.scrapy.org/en/latest/topics/settings.html
#     https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
#     https://docs.scrapy.org/en/latest/topics/spider-middleware.html


BOT_NAME = 'Congressgov'

SPIDER_MODULES = ['Congressgov.spiders']
NEWSPIDER_MODULE = 'Congressgov.spiders'

# Crawl responsibly by identifying yourself (and your website) on the user-agent
# USER_AGENT = 'Congressgov (+http://www.yourdomain.com)'

# Obey robots.txt rules
# ROBOTSTXT_OBEY = True

# Configure maximum concurrent requests performed by Scrapy (default: 16)
CONCURRENT_REQUESTS = 32

# Configure a delay for requests for the same website (default: 0)
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
DOWNLOAD_DELAY = 0.5
# The download delay setting will honor only one of:
# CONCURRENT_REQUESTS_PER_DOMAIN = 16
# CONCURRENT_REQUESTS_PER_IP = 16

# Disable cookies (enabled by default)
# COOKIES_ENABLED = False

# Disable Telnet Console (enabled by default)
# TELNETCONSOLE_ENABLED = False

# Override the default request headers:
# DEFAULT_REQUEST_HEADERS = {
#   'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
#   'Accept-Language': 'en',
# }

# Enable or disable spider middlewares
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
# SPIDER_MIDDLEWARES = {
#    'Congressgov.middlewares.CongressgovSpiderMiddleware': 543,
# }

# Enable or disable downloader middlewares
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html


ENVIRON_ = check_environ()
if ENVIRON_:
    DOWNLOADER_MIDDLEWARES = {
        # 'Congressgov.middlewares.CongressgovDownloaderMiddleware': 1,
        'Congressgov.middlewares.RandomUserAgentMiddleware': 300,
        # 'Congressgov.middlewares.Proxy_DownloaderMiddleware': 310,
        'Congressgov.middlewares.Proxy_Redis_DownloaderMiddleware': 320,
        # 'Congressgov.middlewares.ProcessAllExceptionMiddleware': 500,
        'Congressgov.middlewares.ProcessAllException_ProxyMiddleware': 510,
    }

else:
    DOWNLOADER_MIDDLEWARES = {
        # 'Congressgov.middlewares.CongressgovDownloaderMiddleware': 1,
        'Congressgov.middlewares.RandomUserAgentMiddleware': 300,
        # 'Congressgov.middlewares.Proxy_DownloaderMiddleware': 310,
        # 'Congressgov.middlewares.Proxy_Redis_DownloaderMiddleware': 320,
        'Congressgov.middlewares.ProcessAllExceptionMiddleware': 500,
        # 'Congressgov.middlewares.ProcessAllException_ProxyMiddleware': 510,
    }

# Enable or disable extensions
# See https://docs.scrapy.org/en/latest/topics/extensions.html
# EXTENSIONS = {
#    'scrapy.extensions.telnet.TelnetConsole': None,
# }

# Configure item pipelines
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html

ITEM_PIPELINES = {
    # 'Congressgov.pipelines.CongressgovPipeline': 300,
    'Congressgov.pipelines.MYSQLPipeline': 300,
    'Congressgov.pipelines.MyFileUpdatePipeline': 310,
    'Congressgov.pipelines.MyImageUpdatePipeline': 320,
    # 'Congressgov.pipelines.MyFilePipeline': 330,
    # 'Congressgov.pipelines.MyImagePipeline': 340
}
# 随机延迟机制
RANDOMIZE_DOWNLOAD_DELAY = True
# Enable and configure the AutoThrottle extension (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
# AUTOTHROTTLE_ENABLED = True
# The initial download delay
# AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
# AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
# AUTOTHROTTLE_DEBUG = False

# Enable and configure HTTP caching (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
# HTTPCACHE_ENABLED = True
# HTTPCACHE_EXPIRATION_SECS = 0
# HTTPCACHE_DIR = 'httpcache'
# HTTPCACHE_IGNORE_HTTP_CODES = []
# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'

CURRENT_DATE = time.strftime("%Y_%m_%d", time.localtime(time.time()))
# -----------------------------------------------------------log ------------------------------------------------------
PROJECT_PATH_ = os.path.dirname(__file__)
LOG_DIR = os.path.join(PROJECT_PATH_, 'log')
if not os.path.exists(LOG_DIR):
    os.makedirs(LOG_DIR)
# LOG_FILE=os.path.join(PROJECT_PATH_, '{}.log'.format())
# LOG_ENABLED = True
# # LOG_STDOUT = True
#
# LOG_FORMAT = '%(asctime)s [%(name)s] %(levelname)s: %(message)s'
# LOG_DATEFORMAT = '%Y-%m-%d %H:%M:%S'
# # 日志级别
# LOG_LEVEL = 'INFO'

# ---------------------------------------------------------文件下载存储配置 -----------------------------------------------
FILE_DB = 2
FILE_SET = 'FILES'

if ENVIRON_:
    # 首次采集 文件存储的根路径
    FILES_STORE = 'D:/congress_files_img'
    # 更新采集 文件存储的根路径
    FILES_STORE_UPDATE = 'D:/congress_update/'
    # 首次采集  图片存储的根路径
    IMAGES_STORE = 'D:/congress'
    # 更新采集  图片存储的根路径
    IMAGES_STORE_UPDATE = 'D:/congress_update/'
    # 更新采集  SQL 存储的根路径
    SQL_STORE = 'D:/congress_update'
    # 供转换文件存储使用 作废

    # NEW_FILE_STORE = "D:/FILE_STORE_{}"

else:
    # 首次采集 文件存储的根路径
    FILES_STORE = 'D:/congress'
    # 更新采集 文件存储的根路径
    FILES_STORE_UPDATE = 'D:/congress_update/'
    # 首次采集  图片存储的根路径
    IMAGES_STORE = 'D:/congress'
    # 更新采集  图片存储的根路径
    IMAGES_STORE_UPDATE = 'D:/congress_update/'
    # 更新采集  SQL 存储的根路径
    SQL_STORE = 'D:/congress_update'
    # 供转换文件存储使用 作废
    # NEW_FILE_STORE = "D:/FILE_STORE_{}"
CURRENT_DIR = os.path.join(SQL_STORE, CURRENT_DATE)
if not os.path.exists(CURRENT_DIR):
    os.makedirs(CURRENT_DIR)
# 当前日期 之前天数的 文件将被删除
EXPIRATION_DAY = 30
# ---------------------------------------------------------MYSQL配置信息-------------------------------------------------
MYSQL_HOST = "127.0.0.1"
MYSQL_PORT = 3306
if ENVIRON_:
    MYSQL_USER = "zyyt"
    MYSQL_PWD = "Pwd@123.cn"
    MYSQL_ROOT_PWD = "Pwd@123"
else:

    MYSQL_USER = "root"
    MYSQL_PWD = "yongle95#"
    MYSQL_ROOT_PWD = MYSQL_PWD

MYSQL_DATABASE = "congress"
MYSQL_CHARSET = 'utf8mb4'
MYSQL_TABLE_TASK_URL = 'congress_task_url'

# -----------------------------------------------------------  数据库表名   ---------------------------------------------


MYSQL_LEGISLATION_TABLE = 'congress_legislation'
# MYSQL_LEGISLATION_TM_TABLE = 'congress_legislation_tm'
MYSQL_COMMITTEE_REPORT_TABLE = 'congress_committee_report'
MYSQL_COMMITTEE_MEETINGS_TABLE = 'congress_committee_meetings'
MYSQL_COMMITTEE_PUBLICATIONS_TABLE = 'congress_committee_publications'
MYSQL_RECORD_TABLE = 'congress_record'
MYSQL_MEMBERS_TABLE = 'congress_members'
MYSQL_NOMINATIONS_TABLE = 'congress_nominations'
MYSQL_TREATY_DOCUMENTS_TABLE = 'congress_treaty_documents'
MYSQL_HOUSE_COMMUNICATIONS_TABLE = 'congress_house_communications'
MYSQL_SENATE_COMMUNICATIONS_TABLE = 'congress_senate_communications'


# 数据库表数据转存的新表
CURRENT_DAY_TABLE = "congress_legislation_tmp"

# --------------------------------------------------------  REDIS 配置  -----------------------------------------------
REDIS_HOST = "127.0.0.1"
REDIS_PORT = 6379
REDIS_DB = 0

if ENVIRON_:
    REDIS_PWD = "3DV9EUwX@@"
    REDIS_PARAMS = {
        'password': "3DV9EUwX@@",
        'db': 1
    }
else:
    REDIS_PWD = "yongle95#"
    REDIS_PARAMS = {
        'password': 'yongle95#',
        'db': 1
    }
# 代理池 所在redis key
PROXY_POOL = 'PROXY_POOL'
PROXY_POOL_ERROR = 'PROXY_POOL_ERROR'
PROXY_POOL_SOCKS5 = 'PROXY_POOL_SOCKS5'
PROXY_POOL_ERROR_SOCKS = 'PROXY_POOL_ERROR_SOCKS'
# 用于去重的url
REDIS_DB_SET = 2
LEGISLATION_KEY = 'congress_legislation'
COMMITTEE_REPORT_KEY = 'congress_committee_report'
COMMITTEE_MEETINGS_KEY = 'congress_committee_meetings'
COMMITTEE_PUBLICATIONS_KEY = 'congress_committee_publications'
RECORD_KEY = 'congress_record'
MEMBERS_KEY = 'congress_members'
NOMINATIONS_KEY = 'congress_nominations'
TREATY_DOCUMENTS_KEY = 'congress_treaty_documents'
HOUSE_COMMUNICATIONS_KEY = 'congress_house_communications'
SENATE_COMMUNICATIONS_KEY = 'congress_senate_communications'

# 异常重试次数上限
ERROR_TIME_LIMIT = 15

PROXIES_LIST = [
    "http://185.102.113.220:8085@104.238.133.214:1490",
    "http://185.102.113.97:8085@104.238.133.214:1490",
    "http://185.61.219.240:8085@104.238.133.214:1490",
    "http://212.115.51.233:8085@104.238.133.214:1490",
    "http://185.81.144.57:8085@104.238.133.214:1490",
    "http://195.133.27.79:8085@104.238.133.214:1490",
    "http://141.98.84.161:8085@104.238.133.214:1490",
    "http://77.220.192.225:8085@104.238.133.214:1490",
    "http://212.193.3.146:8085@104.238.133.214:1490",
    "http://194.58.68.240:8085@104.238.133.214:1490",
    "http://83.142.53.236:8085@104.238.133.214:1490",
    "http://213.232.121.49:8085@104.238.133.214:1490",
    "http://194.104.10.28:8085@104.238.133.214:1490",
    "http://62.76.233.65:8085@104.238.133.214:1490",
    "http://194.87.35.60:8085@104.238.133.214:1490",
    "http://45.148.232.243:8085@104.238.133.214:1490",
    "http://185.81.145.53:8085@104.238.133.214:1490",
    "http://194.87.35.207:8085@104.238.133.214:1490",
    "http://185.61.220.32:8085@104.238.133.214:1490",
    "http://194.87.39.34:8085@104.238.133.214:1490",
    "http://194.87.39.40:8085@104.238.133.214:1490",
    "http://77.220.194.175:8085@104.238.133.214:1490",
    "http://185.81.144.28:8085@104.238.133.214:1490",
    "http://185.61.220.132:8085@104.238.133.214:1490",
    "http://185.81.145.214:8085@104.238.133.214:1490",
    "http://185.81.145.158:8085@104.238.133.214:1490",
    "http://185.250.47.84:8085@104.238.133.214:1490",
    "http://5.181.169.154:8085@104.238.133.214:1490",
    "http://185.61.220.41:8085@104.238.133.214:1490",
    "http://45.140.207.202:8085@104.238.133.214:1490",
    "http://77.220.192.189:8085@104.238.133.214:1490",
    "http://37.44.252.254:8085@104.238.133.214:1490",
    "http://185.102.112.161:8085@104.238.133.214:1490",
    "http://213.108.3.168:8085@104.238.133.214:1490",
    "http://194.156.124.55:8085@104.238.133.214:1490",
    "http://194.87.34.163:8085@104.238.133.214:1490",
    "http://217.145.226.231:8085@104.238.133.214:1490",
    "http://45.140.206.52:8085@104.238.133.214:1490",
    "http://45.80.104.153:8085@104.238.133.214:1490",
    "http://194.104.8.234:8085@104.238.133.214:1490",
    "http://194.87.34.202:8085@104.238.133.214:1490",
    "http://185.68.246.90:8085@104.238.133.214:1490",
    "http://37.9.44.24:8085@104.238.133.214:1490",
    "http://5.181.169.98:8085@104.238.133.214:1490",
    "http://195.133.27.98:8085@104.238.133.214:1490",
    "http://185.96.37.148:8085@104.238.133.214:1490",
    "http://79.110.31.133:8085@104.238.133.214:1490",
    "http://194.87.35.174:8085@104.238.133.214:1490",
    "http://213.108.0.216:8085@104.238.133.214:1490",
    "http://45.10.164.21:8085@104.238.133.214:1490",



]

PROXIES_LIST_SOCKS5 = [
    "socks5://185.102.113.220:1085@104.238.133.214:1490",
    "socks5://185.102.113.97:1085@104.238.133.214:1490",
    "socks5://185.61.219.240:1085@104.238.133.214:1490",
    "socks5://212.115.51.233:1085@104.238.133.214:1490",
    "socks5://185.81.144.57:1085@104.238.133.214:1490",
    "socks5://195.133.27.79:1085@104.238.133.214:1490",
    "socks5://141.98.84.161:1085@104.238.133.214:1490",
    "socks5://77.220.192.225:1085@104.238.133.214:1490",
    "socks5://212.193.3.146:1085@104.238.133.214:1490",
    "socks5://194.58.68.240:1085@104.238.133.214:1490",
    "socks5://83.142.53.236:1085@104.238.133.214:1490",
    "socks5://213.232.121.49:1085@104.238.133.214:1490",
    "socks5://194.104.10.28:1085@104.238.133.214:1490",
    "socks5://62.76.233.65:1085@104.238.133.214:1490",
    "socks5://194.87.35.60:1085@104.238.133.214:1490",
    "socks5://45.148.232.243:1085@104.238.133.214:1490",
    "socks5://185.81.145.53:1085@104.238.133.214:1490",
    "socks5://194.87.35.207:1085@104.238.133.214:1490",
    "socks5://185.61.220.32:1085@104.238.133.214:1490",
    "socks5://194.87.39.34:1085@104.238.133.214:1490",
    "socks5://194.87.39.40:1085@104.238.133.214:1490",
    "socks5://77.220.194.175:1085@104.238.133.214:1490",
    "socks5://185.81.144.28:1085@104.238.133.214:1490",
    "socks5://185.61.220.132:1085@104.238.133.214:1490",
    "socks5://185.81.145.214:1085@104.238.133.214:1490",
    "socks5://185.81.145.158:1085@104.238.133.214:1490",
    "socks5://185.250.47.84:1085@104.238.133.214:1490",
    "socks5://5.181.169.154:1085@104.238.133.214:1490",
    "socks5://185.61.220.41:1085@104.238.133.214:1490",
    "socks5://45.140.207.202:1085@104.238.133.214:1490",
    "socks5://77.220.192.189:1085@104.238.133.214:1490",
    "socks5://37.44.252.254:1085@104.238.133.214:1490",
    "socks5://185.102.112.161:1085@104.238.133.214:1490",
    "socks5://213.108.3.168:1085@104.238.133.214:1490",
    "socks5://194.156.124.55:1085@104.238.133.214:1490",
    "socks5://194.87.34.163:1085@104.238.133.214:1490",
    "socks5://217.145.226.231:1085@104.238.133.214:1490",
    "socks5://45.140.206.52:1085@104.238.133.214:1490",
    "socks5://45.80.104.153:1085@104.238.133.214:1490",
    "socks5://194.104.8.234:1085@104.238.133.214:1490",
    "socks5://194.87.34.202:1085@104.238.133.214:1490",
    "socks5://185.68.246.90:1085@104.238.133.214:1490",
    "socks5://37.9.44.24:1085@104.238.133.214:1490",
    "socks5://5.181.169.98:1085@104.238.133.214:1490",
    "socks5://195.133.27.98:1085@104.238.133.214:1490",
    "socks5://185.96.37.148:1085@104.238.133.214:1490",
    "socks5://79.110.31.133:1085@104.238.133.214:1490",
    "socks5://194.87.35.174:1085@104.238.133.214:1490",
    "socks5://213.108.0.216:1085@104.238.133.214:1490",
    "socks5://45.10.164.21:1085@104.238.133.214:1490",


]
# --------------------------------------  使用scrapy-redis 的配置信息  ---------------------------------------------------------
DUPEFILTER_CLASS = "scrapy_redis.dupefilter.RFPDupeFilter"
SCHEDULER = "scrapy_redis.scheduler.Scheduler"
SCHEDULER_PERSIST = True
